From TidyTuesday URL:https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-07-07
summary(coffee_ratings)
## total_cup_points species owner country_of_origin
## Min. : 0.00 Length:1339 Length:1339 Length:1339
## 1st Qu.:81.08 Class :character Class :character Class :character
## Median :82.50 Mode :character Mode :character Mode :character
## Mean :82.09
## 3rd Qu.:83.67
## Max. :90.58
##
## farm_name lot_number mill ico_number
## Length:1339 Length:1339 Length:1339 Length:1339
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## company altitude region producer
## Length:1339 Length:1339 Length:1339 Length:1339
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## number_of_bags bag_weight in_country_partner harvest_year
## Min. : 0.0 Length:1339 Length:1339 Length:1339
## 1st Qu.: 14.0 Class :character Class :character Class :character
## Median : 175.0 Mode :character Mode :character Mode :character
## Mean : 154.2
## 3rd Qu.: 275.0
## Max. :1062.0
##
## grading_date owner_1 variety processing_method
## Length:1339 Length:1339 Length:1339 Length:1339
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## aroma flavor aftertaste acidity body
## Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:7.420 1st Qu.:7.33 1st Qu.:7.250 1st Qu.:7.330 1st Qu.:7.330
## Median :7.580 Median :7.58 Median :7.420 Median :7.580 Median :7.500
## Mean :7.567 Mean :7.52 Mean :7.401 Mean :7.536 Mean :7.517
## 3rd Qu.:7.750 3rd Qu.:7.75 3rd Qu.:7.580 3rd Qu.:7.750 3rd Qu.:7.670
## Max. :8.750 Max. :8.83 Max. :8.670 Max. :8.750 Max. :8.580
##
## balance uniformity clean_cup sweetness
## Min. :0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:7.330 1st Qu.:10.000 1st Qu.:10.000 1st Qu.:10.000
## Median :7.500 Median :10.000 Median :10.000 Median :10.000
## Mean :7.518 Mean : 9.835 Mean : 9.835 Mean : 9.857
## 3rd Qu.:7.750 3rd Qu.:10.000 3rd Qu.:10.000 3rd Qu.:10.000
## Max. :8.750 Max. :10.000 Max. :10.000 Max. :10.000
##
## cupper_points moisture category_one_defects quakers
## Min. : 0.000 Min. :0.00000 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 7.250 1st Qu.:0.09000 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 7.500 Median :0.11000 Median : 0.0000 Median : 0.0000
## Mean : 7.503 Mean :0.08838 Mean : 0.4795 Mean : 0.1734
## 3rd Qu.: 7.750 3rd Qu.:0.12000 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :10.000 Max. :0.28000 Max. :63.0000 Max. :11.0000
## NA's :1
## color category_two_defects expiration certification_body
## Length:1339 Min. : 0.000 Length:1339 Length:1339
## Class :character 1st Qu.: 0.000 Class :character Class :character
## Mode :character Median : 2.000 Mode :character Mode :character
## Mean : 3.556
## 3rd Qu.: 4.000
## Max. :55.000
##
## certification_address certification_contact unit_of_measurement
## Length:1339 Length:1339 Length:1339
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## altitude_low_meters altitude_high_meters altitude_mean_meters
## Min. : 1 Min. : 1 Min. : 1
## 1st Qu.: 1100 1st Qu.: 1100 1st Qu.: 1100
## Median : 1311 Median : 1350 Median : 1311
## Mean : 1751 Mean : 1799 Mean : 1775
## 3rd Qu.: 1600 3rd Qu.: 1650 3rd Qu.: 1600
## Max. :190164 Max. :190164 Max. :190164
## NA's :230 NA's :230 NA's :230
A few NA’s.
1 within quakers, and 230 in Altitude low/high/mean
Check what is happening in the rest of the data set
apply(X=is.na(coffee_ratings), MARGIN = 2, FUN = sum)
## total_cup_points species owner
## 0 0 7
## country_of_origin farm_name lot_number
## 1 359 1063
## mill ico_number company
## 315 151 209
## altitude region producer
## 226 59 231
## number_of_bags bag_weight in_country_partner
## 0 0 0
## harvest_year grading_date owner_1
## 47 0 7
## variety processing_method aroma
## 226 170 0
## flavor aftertaste acidity
## 0 0 0
## body balance uniformity
## 0 0 0
## clean_cup sweetness cupper_points
## 0 0 0
## moisture category_one_defects quakers
## 0 0 1
## color category_two_defects expiration
## 218 0 0
## certification_body certification_address certification_contact
## 0 0 0
## unit_of_measurement altitude_low_meters altitude_high_meters
## 0 230 230
## altitude_mean_meters
## 230
I will be just removing some of the columns with many missing vlaues, for instance farm_name.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.4 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Removal of columns
coffee = coffee_ratings%>%
select(-farm_name,-lot_number,-mill,-ico_number,-altitude,
-altitude_low_meters,-altitude_high_meters,-producer,-company,
-expiration,-certification_address,-owner_1,-grading_date,
-certification_contact,-unit_of_measurement)
apply(X=is.na(coffee), MARGIN = 2, FUN = sum)
## total_cup_points species owner
## 0 0 7
## country_of_origin region number_of_bags
## 1 59 0
## bag_weight in_country_partner harvest_year
## 0 0 47
## variety processing_method aroma
## 226 170 0
## flavor aftertaste acidity
## 0 0 0
## body balance uniformity
## 0 0 0
## clean_cup sweetness cupper_points
## 0 0 0
## moisture category_one_defects quakers
## 0 0 1
## color category_two_defects certification_body
## 218 0 0
## altitude_mean_meters
## 230
#view(coffee)
coffee = na.omit(coffee)
#view(coffee)
#Changing Msss to all Imperial Units of Measurements#
coffee[grep("lbs",coffee$bag_weight),]
## # A tibble: 18 x 28
## total_cup_points species owner country_of_origin region number_of_bags
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 87.2 Arabica the coff… Costa Rica san ram… 250
## 2 86.3 Arabica francisc… Costa Rica west an… 250
## 3 85.3 Arabica the coff… Costa Rica west va… 250
## 4 85.3 Arabica the coff… Costa Rica san ram… 250
## 5 84.7 Arabica fabian c… Costa Rica tarrazu 50
## 6 84.5 Arabica fabian c… Costa Rica tarrazu 250
## 7 83.8 Arabica german n… United States (Pu… yauco r… 18
## 8 83.8 Arabica the coff… Guatemala quetzal… 250
## 9 83.3 Arabica the coff… Costa Rica san ram… 250
## 10 83.3 Arabica itiah co… Haiti thiotte… 2
## 11 83 Arabica german n… United States (Pu… yauco r… 17
## 12 81.5 Arabica myriam k… Haiti dondon,… 300
## 13 81.2 Arabica essencec… Guatemala huehuet… 36
## 14 81.1 Arabica german n… United States (Pu… yauco r… 18
## 15 80.9 Arabica chris fi… Nicaragua matagal… 275
## 16 80.8 Arabica the coff… Costa Rica san ram… 250
## 17 79.3 Arabica the coff… Colombia pereira 250
## 18 79.1 Arabica german n… United States (Pu… yauco r… 18
## # … with 22 more variables: bag_weight <chr>, in_country_partner <chr>,
## # harvest_year <chr>, variety <chr>, processing_method <chr>, aroma <dbl>,
## # flavor <dbl>, aftertaste <dbl>, acidity <dbl>, body <dbl>, balance <dbl>,
## # uniformity <dbl>, clean_cup <dbl>, sweetness <dbl>, cupper_points <dbl>,
## # moisture <dbl>, category_one_defects <dbl>, quakers <dbl>, color <chr>,
## # category_two_defects <dbl>, certification_body <chr>,
## # altitude_mean_meters <dbl>
coffee = separate(data = coffee, col = bag_weight, into = c("weight", "type"), sep = " ")
coffee$weight = as.numeric(coffee$weight)
for(i in 1:length(coffee)){
if(coffee[i,8]=="kg"){
coffee[i,7] = round(coffee[i,7] * 2.20462,0)
coffee[i,8] = "lbs"
}
}
coffee = coffee%>%
select(-type)
#Changing Length to all Imperial Units of Measurements#
coffee = coffee%>%
rename(avg_altitude=altitude_mean_meters)
coffee$avg_altitude = round(coffee$avg_altitude * 3.28084,0)
#Altering androws with years with form Year1/Year2 to the intial year (Year1)#
coffee$harvest_year = substr(coffee$harvest_year,1,4)
coffee$harvest_year = as.numeric(coffee$harvest_year)
summary(coffee[,c(9,12:24,26,28)])
## harvest_year aroma flavor aftertaste acidity
## Min. :2011 Min. :5.080 Min. :6.170 Min. :6.170 Min. :5.250
## 1st Qu.:2012 1st Qu.:7.420 1st Qu.:7.330 1st Qu.:7.170 1st Qu.:7.330
## Median :2014 Median :7.580 Median :7.500 Median :7.420 Median :7.500
## Mean :2014 Mean :7.559 Mean :7.504 Mean :7.374 Mean :7.515
## 3rd Qu.:2015 3rd Qu.:7.750 3rd Qu.:7.670 3rd Qu.:7.580 3rd Qu.:7.670
## Max. :2018 Max. :8.750 Max. :8.670 Max. :8.500 Max. :8.580
## body balance uniformity clean_cup
## Min. :6.330 Min. :6.080 Min. : 6.000 Min. : 0.000
## 1st Qu.:7.330 1st Qu.:7.330 1st Qu.:10.000 1st Qu.:10.000
## Median :7.500 Median :7.500 Median :10.000 Median :10.000
## Mean :7.494 Mean :7.488 Mean : 9.871 Mean : 9.849
## 3rd Qu.:7.670 3rd Qu.:7.670 3rd Qu.:10.000 3rd Qu.:10.000
## Max. :8.420 Max. :8.580 Max. :10.000 Max. :10.000
## sweetness cupper_points moisture category_one_defects
## Min. : 1.33 Min. :5.170 Min. :0.00000 Min. : 0.0000
## 1st Qu.:10.00 1st Qu.:7.250 1st Qu.:0.10000 1st Qu.: 0.0000
## Median :10.00 Median :7.500 Median :0.11000 Median : 0.0000
## Mean : 9.93 Mean :7.459 Mean :0.09737 Mean : 0.4262
## 3rd Qu.:10.00 3rd Qu.:7.670 3rd Qu.:0.12000 3rd Qu.: 0.0000
## Max. :10.00 Max. :8.580 Max. :0.17000 Max. :31.0000
## quakers category_two_defects avg_altitude
## Min. : 0.0000 Min. : 0.000 Min. : 3
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 3609
## Median : 0.0000 Median : 2.000 Median : 4300
## Mean : 0.1521 Mean : 3.822 Mean : 6145
## 3rd Qu.: 0.0000 3rd Qu.: 5.000 3rd Qu.: 5249
## Max. :11.0000 Max. :47.000 Max. :623898
library(ggplot2)
check for outliers in some of the fields
defect1_plt = ggplot(coffee, aes(y=category_one_defects)) +
geom_boxplot()
defect2_plt = ggplot(coffee, aes(y=category_two_defects)) +
geom_boxplot()
alt_plt = ggplot(coffee, aes(y=avg_altitude)) +
geom_boxplot()
defect1_plt
defect2_plt
alt_plt
There are some outliers, but not that many that would result in a concern at this time.
Pick out some of the information that is not necessary at this point in exploration
c = coffee[,c(1:2,4,10:26,28)]
Condense the data
c.v1 = c%>%pivot_longer(
cols = !c(species, country_of_origin,variety,processing_method,color),
names_to = "Variables",
values_to = "Values")
c.v1
## # A tibble: 14,304 x 7
## species country_of_origin variety processing_method color Variables Values
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Arabica Ethiopia Other Washed / Wet Green total_cup_p… 89.9
## 2 Arabica Ethiopia Other Washed / Wet Green aroma 8.75
## 3 Arabica Ethiopia Other Washed / Wet Green flavor 8.67
## 4 Arabica Ethiopia Other Washed / Wet Green aftertaste 8.5
## 5 Arabica Ethiopia Other Washed / Wet Green acidity 8.58
## 6 Arabica Ethiopia Other Washed / Wet Green body 8.42
## 7 Arabica Ethiopia Other Washed / Wet Green balance 8.42
## 8 Arabica Ethiopia Other Washed / Wet Green uniformity 10
## 9 Arabica Ethiopia Other Washed / Wet Green clean_cup 10
## 10 Arabica Ethiopia Other Washed / Wet Green sweetness 10
## # … with 14,294 more rows
Plot the data to see overall behavior
ggplot(c.v1,aes(x=species,y=Values,color=color))+geom_boxplot()+facet_wrap(~Variables,scales = "free")
Filter out the items that have known outliers
c.v2 = c.v1 %>%
filter(Variables != 'avg_altitude' & Variables != 'category_one_defects'& Variables != 'category_two_defects')
ggplot(c.v2,aes(x=species,y=Values,color=color))+geom_boxplot()+facet_wrap(~Variables,scales = "free")
How are the cup points distributed and where the weight it is at
c.v2 %>%
filter(Variables == 'total_cup_points')%>%
ggplot(aes(x=species,y=Values,color=color))+geom_boxplot()
c.v2 %>%
filter(Variables == 'total_cup_points')%>%
ggplot(aes(x=species,y=Values,color=color))+geom_violin()
c.v2 %>%
filter(Variables == 'total_cup_points')%>%
ggplot(aes(x=processing_method,y=Values,color=color))+geom_boxplot()+
facet_wrap(~processing_method,scales = "free")
c.v2 %>%
filter(Variables == 'total_cup_points')%>%
ggplot(aes(x=processing_method,y=Values,color=color))+geom_violin()+
facet_wrap(~processing_method,scales = "free")
See the data make-up in a different view
library(formattable)
#Function for Calculating Frequency#
freqq = function(df,col_i,col_j){
a = df %>%
group_by({{col_i}},{{col_j}}) %>%
summarise(count = n()) %>%
mutate(freq = formattable::percent(count / sum(count)))
return(a)
}
#Framework of the Individual Variables#
freqq(c.v1,country_of_origin,Values)
## `summarise()` has grouped output by 'country_of_origin'. You can override using the `.groups` argument.
## # A tibble: 1,594 x 4
## # Groups: country_of_origin [28]
## country_of_origin Values count freq
## <chr> <dbl> <int> <formttbl>
## 1 Brazil 0 214 14.08%
## 2 Brazil 0.07 1 0.07%
## 3 Brazil 0.1 6 0.39%
## 4 Brazil 0.11 42 2.76%
## 5 Brazil 0.12 22 1.45%
## 6 Brazil 1 23 1.51%
## 7 Brazil 2 26 1.71%
## 8 Brazil 3 31 2.04%
## 9 Brazil 4 8 0.53%
## 10 Brazil 5 8 0.53%
## # … with 1,584 more rows
freqq(c.v1,Variables,Values)
## `summarise()` has grouped output by 'Variables'. You can override using the `.groups` argument.
## # A tibble: 611 x 4
## # Groups: Variables [16]
## Variables Values count freq
## <chr> <dbl> <int> <formttbl>
## 1 acidity 5.25 1 0.11%
## 2 acidity 6.08 1 0.11%
## 3 acidity 6.25 1 0.11%
## 4 acidity 6.5 1 0.11%
## 5 acidity 6.67 3 0.34%
## 6 acidity 6.75 2 0.22%
## 7 acidity 6.83 6 0.67%
## 8 acidity 6.92 7 0.78%
## 9 acidity 7 23 2.57%
## 10 acidity 7.08 25 2.80%
## # … with 601 more rows
Format the label (total_cup_points) to be categorical
coffee$tcp = coffee$total_cup_points
for(i in 1:894){
if(coffee[i,29] >= 80){
coffee[i,29] = 80
}
else if(coffee[i,29] >= 70 & coffee[i,29] < 80){
coffee[i,29] = 70
}
else if(coffee[i,29] >= 60 & coffee[i,29] < 70){
coffee[i,29] = 60
}
else{
coffee[i,29] = 50
}
}
coffee$tcp = round(coffee$tcp,0)
Accuracy table for comparison between models
table_accuracy = matrix(nrow=6,ncol=1)
colnames(table_accuracy) = c('Accuracy')
rownames(table_accuracy) = c('DTree','NB','SVM-Linerar','SVM-Polynomial','ANN','KNN')
table_accuracy
## Accuracy
## DTree NA
## NB NA
## SVM-Linerar NA
## SVM-Polynomial NA
## ANN NA
## KNN NA
Set seed so analysis is repeatable
set.seed(1)
For analysis
df = coffee[,c(9:22,25,29)]
for(i in 4 : 13){
df[,i]=round(df[,i],2)
}
#view(df)
df$processing_method= as.factor(df$processing_method)
df$variety = as.factor(df$variety)
df = df[,c(1:16)]
#view(df)
Simple k-fold cross validation(cv)
set.seed(1)
n = nrow(df)
folds = 10
tail = n%/%folds
set.seed(1)
rnd = runif(n)
rank = rank(rnd)
#block/chunck from cv is blk
blk = (rank-1)%/%tail+1
blk = as.factor(blk)
#to see formation of folds
print(summary(blk))
## 1 2 3 4 5 6 7 8 9 10 11
## 89 89 89 89 89 89 89 89 89 89 4
#df$variety = as.numeric(df$variety)
df$tcp = as.factor(df$tcp)
df$moisture = round(df$moisture,1)
library(rpart)
#dtree
set.seed(1)
all.acc = numeric(0)
for(i in 1:folds){
tree = rpart(tcp~.,df[blk != i,],method="class")
pred = predict(tree,df[blk==i,],type="class")
confMat = table(pred,df$tcp[blk==i])
acc = (confMat[1,1]+confMat[2,2]+confMat[3,3]+confMat[4,4])/sum(confMat)
all.acc = rbind(all.acc,acc)
}
print(mean(all.acc))
## [1] 0.9516854
table_accuracy[1,1] = mean(all.acc)
I re-formatted the label/target field and went from a binary (good/bad) grading and could not figure out why the accuracy was so low (0.003) and then looked into what the accuracy was calculating…
confMat
##
## pred 50 60 70 80
## 50 0 0 0 0
## 60 0 0 0 0
## 70 0 0 13 0
## 80 0 0 3 73
rpart.plot::rpart.plot(tree)
From this plot, I may need to re-evaluate the buckets for the label/target column.
# naive Bayes (gaussian data)
library(e1071)
set.seed(1)
all.acc = numeric(0)
for(i in 1:folds){
model = naiveBayes(tcp~.,df[blk != i,],method="class")
pred = predict(model,df[blk==i,],type="class")
confMat = table(pred,df$tcp[blk==i])
acc = (confMat[1,1]+confMat[2,2]+confMat[3,3]+confMat[4,4])/sum(confMat)
all.acc = rbind(all.acc,acc)
}
print(mean(all.acc))
## [1] 0.9550562
table_accuracy[2,1] = mean(all.acc)
#svm linear
set.seed(1)
all.acc = numeric(0)
for(i in 1:folds){
model = svm(tcp~. ,df[blk != i,],kernel="linear",type="C")
pred = predict(model,df[blk==i,],type="class")
confMat = table(pred,df$tcp[blk==i])
acc = (confMat[1,1]+confMat[2,2]+confMat[3,3]+confMat[4,4])/sum(confMat)
all.acc = rbind(all.acc,acc)
}
print(mean(all.acc))
## [1] 0.9865169
table_accuracy[3,1] = mean(all.acc)
ggplot(df,aes(x=clean_cup,y=uniformity,color=tcp))+geom_point()
#+ facet_wrap(~processing_method,scales = "free")
#svm poly
set.seed(1)
all.acc = numeric(0)
for(i in 1:folds){
model = svm(tcp~.,df[blk != i,],kernel="polynomial",type="C")
pred = predict(model,df[blk==i,],type="class")
confMat = table(pred,df$tcp[blk==i])
acc = (confMat[1,1]+confMat[2,2]+confMat[3,3]+confMat[4,4])/sum(confMat)
all.acc = rbind(all.acc,acc)
}
print(mean(all.acc))
## [1] 0.9404494
table_accuracy[4,1] = mean(all.acc)
df$tcp = round(as.numeric(df$tcp),0)
df$tcp = as.factor(df$tcp)
#ann
library(nnet)
set.seed(1)
all.acc = numeric(0)
for(i in 1:folds){
model = nnet(tcp~.,df[blk != i,], size = 11, trace=FALSE, rang=.06, decay=.006,maxit=500)
pred = predict(model, df[blk==i,],type="class")
confMat = table(factor(pred,levels=1:4),factor(df$tcp[blk==i],levels=1:4))
acc = (confMat[1,1]+confMat[2,2]+confMat[3,3]+confMat[4,4])/sum(confMat)
all.acc = rbind(all.acc,acc)
}
print(mean(all.acc))
## [1] 0.8876404
table_accuracy[5,1] = mean(all.acc)
library("NeuralNetTools")
plotnet(model,circle_cex=5,cex_val=.4,max_sp=TRUE,alpha_val=.25,skip=TRUE)
There was an (un)interesting issue with NN table, as it was dropping the first two rows as it was not forward feeding into those nodes. The following is the work around to resolve this issue.
#Before#
set.seed(1)
i=1
model = nnet(tcp~.,df[blk != i,], size = 10, trace=FALSE, wgts=.05)
pred = predict(model, df[blk==i,],type="class")
confMat = table(pred,df$tcp[blk==i])
confMat
##
## pred 1 2 3 4
## 3 1 0 16 72
#After#
set.seed(1)
i=1
model = nnet(tcp~.,df[blk != i,], size = 10, trace=FALSE, wgts=.05)
pred = predict(model, df[blk==i,],type="class")
confMat = table(factor(pred,levels=1:4),factor(df$tcp[blk==i],levels=1:4))
confMat
##
## 1 2 3 4
## 1 0 0 0 0
## 2 0 0 0 0
## 3 1 0 16 72
## 4 0 0 0 0
df$tcp = as.factor(df$tcp)
library (caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
trControl <- trainControl(method = "cv", number = 10)
knn = df[,]
model <- train(tcp ~ .,
method = "knn",
tuneGrid = expand.grid(k = 1:10),
trControl = trControl,
data = knn)
plot(model)
acc = mean(model$results$Accuracy)
table_accuracy[6,1] = acc
tab = round(table_accuracy,4)
tab
## Accuracy
## DTree 0.9517
## NB 0.9551
## SVM-Linerar 0.9865
## SVM-Polynomial 0.9404
## ANN 0.8876
## KNN 0.9285
##Easiest Example of ##
rpart.plot::rpart.plot(tree)
##Interaction Visuals##
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:formattable':
##
## style
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig = coffee %>%
plot_ly(
x = ~total_cup_points,
y = ~cupper_points,
color = ~color,
colors = "Accent",
#frame = ~frame,
text = ~variety,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
#showlegend=FALSE
)
fig
fig = coffee %>%
plot_ly(
x = ~total_cup_points,
y = ~cupper_points,
split = ~processing_method,
color = ~color,
colors = "Accent",
#frame = ~frame,
text = ~variety,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
#showlegend=FALSE
)
fig
fig = coffee %>%
plot_ly(
x = ~total_cup_points,
y = ~flavor,
split = ~color,
color = ~processing_method,
colors = "Accent",
#frame = ~frame,
text = ~variety,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
#showlegend=FALSE
)
fig
fig = coffee %>%
plot_ly(
x = ~total_cup_points,
y = ~clean_cup,
split = ~color,
color = ~processing_method,
colors = "Accent",
#frame = ~frame,
text = ~variety,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
#showlegend=FALSE
)
fig